Import Libraries¶

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import openpyxl
import sys
import seaborn as sns
import plotly.express as px # graphing interactive map from data
from plotly import tools
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
sys.setrecursionlimit(10000000)

# Render our plots inline
%matplotlib inline
# Make the graphs a bit prettier, and bigger
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 7)

Import Data¶

In [2]:
GTDB_USA_all = pd.read_csv('GTDB_USA.csv', header=0)
GTDB_USA_all
Out[2]:
eventid iyear imonth iday approxdate extended resolution country country_txt region ... addnotes scite1 scite2 scite3 dbsource INT_LOG INT_IDEO INT_MISC INT_ANY related
0 197001010002 1970 1 1 NaN 0 NaN 217 United States 1 ... The Cairo Chief of Police, William Petersen, r... "Police Chief Quits," Washington Post, January... "Cairo Police Chief Quits; Decries Local 'Mili... Christopher Hewitt, "Political Violence and Te... Hewitt Project -9 -9 0 -9 NaN
1 197001020002 1970 1 2 NaN 0 NaN 217 United States 1 ... Damages were estimated to be between $20,000-$... Committee on Government Operations United Stat... Christopher Hewitt, "Political Violence and Te... NaN Hewitt Project -9 -9 0 -9 NaN
2 197001020003 1970 1 2 NaN 0 NaN 217 United States 1 ... The New Years Gang issue a communiqué to a loc... Tom Bates, "Rads: The 1970 Bombing of the Army... David Newman, Sandra Sutherland, and Jon Stewa... The Wisconsin Cartographers' Guild, "Wisconsin... Hewitt Project 0 0 0 0 NaN
3 197001030001 1970 1 3 NaN 0 NaN 217 United States 1 ... Karl Armstrong's girlfriend, Lynn Schultz, dro... Committee on Government Operations United Stat... Tom Bates, "Rads: The 1970 Bombing of the Army... David Newman, Sandra Sutherland, and Jon Stewa... Hewitt Project 0 0 0 0 NaN
4 197001050001 1970 1 1 NaN 0 NaN 217 United States 1 ... NaN NaN NaN NaN PGIS 0 0 0 0 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3116 202012130030 2020 12 13 NaN 0 NaN 217 United States 1 ... The assailant targeted this church three separ... "Black leaders air fears following church fire... "Suspicious fire at Black church being investi... NaN START Primary Collection -9 -9 0 -9 NaN
3117 202012130044 2020 12 13 NaN 0 NaN 217 United States 1 ... NaN "A gunman is dead after a shooting at a New Yo... "Church-shoot guy riled over Latin America," D... "NYC cathedral gunman's note says he planned t... START Primary Collection -9 -9 0 -9 NaN
3118 202012150028 2020 12 15 NaN 0 NaN 217 United States 1 ... The assailant targeted this church three separ... "Black leaders air fears following church fire... "Suspicious fire at Black church being investi... NaN START Primary Collection -9 -9 0 -9 NaN
3119 202012250003 2020 12 25 NaN 0 NaN 217 United States 1 ... There is doubt that this incident meets terror... "Bomber to neighbor: The world is 'never going... "FBI: Nashville bomber driven by conspiracies,... "Behind the Nashville Bombing, a Conspiracy Th... START Primary Collection -9 -9 0 -9 NaN
3120 202012280022 2020 12 28 NaN 0 NaN 217 United States 1 ... The assailant targeted this church three separ... "Black leaders air fears following church fire... "Suspicious fire at Black church being investi... NaN START Primary Collection -9 -9 0 -9 NaN

3121 rows × 135 columns

Select Variables of Interest¶

In [3]:
GTDB_USA = GTDB_USA_all[['iyear', 'imonth', 'iday', 'provstate','city', 'crit1', 'crit2', 'crit3', 'suicide',
                    'attacktype1', 'attacktype1_txt', 'targtype1', 'targsubtype1', 'targtype1_txt', 'gname', 'nkill', 'nwound', 
                    'weaptype1', 'weaptype1_txt', 'success']]
GTDB_USA
Out[3]:
iyear imonth iday provstate city crit1 crit2 crit3 suicide attacktype1 attacktype1_txt targtype1 targsubtype1 targtype1_txt gname nkill nwound weaptype1 weaptype1_txt success
0 1970 1 1 Illinois Cairo 1 1 1 0 2 Armed Assault 3 22.0 Police Black Nationalists 0.0 0.0 5 Firearms 1
1 1970 1 2 California Oakland 1 1 1 0 3 Bombing/Explosion 21 107.0 Utilities Unknown 0.0 0.0 6 Explosives 1
2 1970 1 2 Wisconsin Madison 1 1 1 0 7 Facility/Infrastructure Attack 4 28.0 Military New Year's Gang 0.0 0.0 8 Incendiary 1
3 1970 1 3 Wisconsin Madison 1 1 1 0 7 Facility/Infrastructure Attack 2 21.0 Government (General) New Year's Gang 0.0 0.0 8 Incendiary 1
4 1970 1 1 Wisconsin Baraboo 1 1 0 0 3 Bombing/Explosion 4 27.0 Military Weather Underground, Weathermen 0.0 0.0 6 Explosives 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3116 2020 12 13 Massachusetts Springfield 1 1 1 0 7 Facility/Infrastructure Attack 15 86.0 Religious Figures/Institutions White supremacists/nationalists 0.0 0.0 8 Incendiary 1
3117 2020 12 13 New York New York City 1 1 1 1 6 Hostage Taking (Kidnapping) 15 86.0 Religious Figures/Institutions Anti-globalization extremists 1.0 0.0 5 Firearms 0
3118 2020 12 15 Massachusetts Springfield 1 1 1 0 7 Facility/Infrastructure Attack 15 86.0 Religious Figures/Institutions White supremacists/nationalists 0.0 0.0 8 Incendiary 1
3119 2020 12 25 Tennessee Nashville 0 1 1 1 3 Bombing/Explosion 1 7.0 Business Conspiracy theory extremists 1.0 3.0 6 Explosives 1
3120 2020 12 28 Massachusetts Springfield 1 1 1 0 7 Facility/Infrastructure Attack 15 86.0 Religious Figures/Institutions White supremacists/nationalists 0.0 0.0 8 Incendiary 1

3121 rows × 20 columns

In [4]:
GTDB_USA.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3121 entries, 0 to 3120
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   iyear            3121 non-null   int64  
 1   imonth           3121 non-null   int64  
 2   iday             3121 non-null   int64  
 3   provstate        3121 non-null   object 
 4   city             3121 non-null   object 
 5   crit1            3121 non-null   int64  
 6   crit2            3121 non-null   int64  
 7   crit3            3121 non-null   int64  
 8   suicide          3121 non-null   int64  
 9   attacktype1      3121 non-null   int64  
 10  attacktype1_txt  3121 non-null   object 
 11  targtype1        3121 non-null   int64  
 12  targsubtype1     2998 non-null   float64
 13  targtype1_txt    3121 non-null   object 
 14  gname            3121 non-null   object 
 15  nkill            3051 non-null   float64
 16  nwound           3031 non-null   float64
 17  weaptype1        3121 non-null   int64  
 18  weaptype1_txt    3121 non-null   object 
 19  success          3121 non-null   int64  
dtypes: float64(3), int64(11), object(6)
memory usage: 487.8+ KB
In [5]:
GTDB_USA.shape
Out[5]:
(3121, 20)
In [6]:
# Rename columns 
GTDB_USA.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day', 'provstate':'State','attacktype1':'Attack_Type', 'attacktype1_txt': 'Attack',
                        'targtype1_txt':'Target','nkill':'Killed', 'nwound':'Wounded','gname':'Group','targtype1':'Target_type',
                        'weaptype1':'Weapon_type', 'weaptype1_txt':'Weapon'},inplace=True)
GTDB_USA.head()
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1627560304.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[6]:
Year Month Day State city crit1 crit2 crit3 suicide Attack_Type Attack Target_type targsubtype1 Target Group Killed Wounded Weapon_type Weapon success
0 1970 1 1 Illinois Cairo 1 1 1 0 2 Armed Assault 3 22.0 Police Black Nationalists 0.0 0.0 5 Firearms 1
1 1970 1 2 California Oakland 1 1 1 0 3 Bombing/Explosion 21 107.0 Utilities Unknown 0.0 0.0 6 Explosives 1
2 1970 1 2 Wisconsin Madison 1 1 1 0 7 Facility/Infrastructure Attack 4 28.0 Military New Year's Gang 0.0 0.0 8 Incendiary 1
3 1970 1 3 Wisconsin Madison 1 1 1 0 7 Facility/Infrastructure Attack 2 21.0 Government (General) New Year's Gang 0.0 0.0 8 Incendiary 1
4 1970 1 1 Wisconsin Baraboo 1 1 0 0 3 Bombing/Explosion 4 27.0 Military Weather Underground, Weathermen 0.0 0.0 6 Explosives 0
In [7]:
GTDB_USA['Weapon'].value_counts()
Out[7]:
Explosives                                                                     1441
Incendiary                                                                      968
Firearms                                                                        488
Melee                                                                            68
Unknown                                                                          39
Chemical                                                                         28
Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)      25
Biological                                                                       24
Sabotage Equipment                                                               18
Other                                                                            17
Fake Weapons                                                                      4
Radiological                                                                      1
Name: Weapon, dtype: int64
In [8]:
GTDB_USA['Group'].value_counts()
Out[8]:
Unknown                                          606
Anti-Abortion extremists                         234
Left-Wing Militants                              169
White supremacists/nationalists                  131
Fuerzas Armadas de Liberacion Nacional (FALN)    120
                                                ... 
Lebanese Man                                       1
Organization Alliance of Cuban Intransigence       1
Mormon Extremist                                   1
Nuclear Liberation Front                           1
Anti-globalization extremists                      1
Name: Group, Length: 244, dtype: int64
In [9]:
#GTDB_USA['Recode Group'] = GTDB_USA['Group']
#GTDB_USA['Recode Group'].replace(GTDB_USA['Recode Group'].value_counts()[:15].values, 'Other')
#GTDB_USA.loc[GTDB_USA["Recode Group"] == GTDB_USA['Recode Group'].value_counts()[:15]] = 'Other'
#GTDB_USA['Recode Group'].mask(GTDB_USA['Recode Group'] == GTDB_USA['Recode Group'].value_counts()[:15], 'Other', inplace=True)
#GTDB_USA. head()
In [10]:
#newGroups = GTDB_USA[['Group']]
#newGroups['total'] = newGroups.groupby(['Group']).size()
#newGroups
In [11]:
#top10 = GTDB_USA.nsmallest(10, 'Group')
#GTDB_USA['Recode_Group'] = np.where((GTDB_USA['Group'].eq(top10['Group'])),GTDB_USA['Group'],'Other')
In [12]:
#GTDB_USA['Recode_Group'].value_counts()
In [13]:
#GTDB_USA['success'].value_counts()
In [14]:
#GTDB_USA['Weapon_type'].value_counts()
In [15]:
#GTDB_USA['weapsubtype1'].value_counts()
In [16]:
#GTDB_USA['Target'].value_counts().head(20)
In [17]:
#GTDB_USA['Target_type'].value_counts()
In [18]:
#GTDB_USA['State'].value_counts()

Data Cleaning¶

Drop Puerto Rico and US Virgin Islands and unknown locations.

In [19]:
#GTDB_USA = GTDB_USA[GTDB_USA.State != ['Puerto Rico', 'U.S. Virgin Islands']]
GTDB_USA = GTDB_USA[GTDB_USA.State.isin(['Puerto Rico', 'U.S. Virgin Islands', 'Unknown']) == False]
In [20]:
GTDB_USA['State'].value_counts()
Out[20]:
California              632
New York                541
Florida                 176
Washington              119
Illinois                115
Texas                    89
District of Columbia     88
Oregon                   72
Massachusetts            68
Ohio                     58
Pennsylvania             50
Michigan                 50
New Jersey               50
Colorado                 48
Missouri                 46
Arizona                  44
Virginia                 43
Wisconsin                43
Maryland                 42
Georgia                  42
North Carolina           35
Tennessee                34
Louisiana                28
Minnesota                28
Indiana                  26
New Mexico               25
Nevada                   24
Nebraska                 24
Iowa                     24
Utah                     22
Connecticut              19
Alabama                  17
Oklahoma                 15
Kansas                   14
Idaho                    14
Mississippi              14
South Carolina           11
New Hampshire            11
South Dakota              9
Delaware                  8
Arkansas                  8
Montana                   7
Kentucky                  7
North Dakota              7
Vermont                   5
Hawaii                    5
Maine                     4
Wyoming                   3
Rhode Island              2
West Virginia             2
Alaska                    1
Name: State, dtype: int64
In [21]:
GTDB_USA.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2869 entries, 0 to 3120
Data columns (total 20 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Year          2869 non-null   int64  
 1   Month         2869 non-null   int64  
 2   Day           2869 non-null   int64  
 3   State         2869 non-null   object 
 4   city          2869 non-null   object 
 5   crit1         2869 non-null   int64  
 6   crit2         2869 non-null   int64  
 7   crit3         2869 non-null   int64  
 8   suicide       2869 non-null   int64  
 9   Attack_Type   2869 non-null   int64  
 10  Attack        2869 non-null   object 
 11  Target_type   2869 non-null   int64  
 12  targsubtype1  2751 non-null   float64
 13  Target        2869 non-null   object 
 14  Group         2869 non-null   object 
 15  Killed        2819 non-null   float64
 16  Wounded       2801 non-null   float64
 17  Weapon_type   2869 non-null   int64  
 18  Weapon        2869 non-null   object 
 19  success       2869 non-null   int64  
dtypes: float64(3), int64(11), object(6)
memory usage: 470.7+ KB

Check the data for missing values.

In [22]:
GTDB_USA.isna().sum()
Out[22]:
Year              0
Month             0
Day               0
State             0
city              0
crit1             0
crit2             0
crit3             0
suicide           0
Attack_Type       0
Attack            0
Target_type       0
targsubtype1    118
Target            0
Group             0
Killed           50
Wounded          68
Weapon_type       0
Weapon            0
success           0
dtype: int64

To get a better look at the missing data, we can calculate the percentage of missing data for each column.

In [23]:
#missing data
total = GTDB_USA.isnull().sum().sort_values(ascending=False)
percent = (GTDB_USA.isnull().sum()/GTDB_USA.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head(20)
Out[23]:
Total Percent
targsubtype1 118 0.041129
Wounded 68 0.023702
Killed 50 0.017428
Year 0 0.000000
Month 0 0.000000
Weapon 0 0.000000
Weapon_type 0 0.000000
Group 0 0.000000
Target 0 0.000000
Target_type 0 0.000000
Attack 0 0.000000
Attack_Type 0 0.000000
suicide 0 0.000000
crit3 0 0.000000
crit2 0 0.000000
crit1 0 0.000000
city 0 0.000000
State 0 0.000000
Day 0 0.000000
success 0 0.000000

We will drop all rows with N/A values.

In [24]:
GTDB_USA = GTDB_USA.dropna()
In [25]:
GTDB_USA['Weapon'].value_counts()
Out[25]:
Explosives                                                                     1133
Incendiary                                                                      909
Firearms                                                                        445
Melee                                                                            66
Chemical                                                                         25
Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)      25
Biological                                                                       23
Sabotage Equipment                                                               18
Unknown                                                                          17
Other                                                                            17
Fake Weapons                                                                      4
Radiological                                                                      1
Name: Weapon, dtype: int64
In [26]:
GTDB_USA.isna().sum()
Out[26]:
Year            0
Month           0
Day             0
State           0
city            0
crit1           0
crit2           0
crit3           0
suicide         0
Attack_Type     0
Attack          0
Target_type     0
targsubtype1    0
Target          0
Group           0
Killed          0
Wounded         0
Weapon_type     0
Weapon          0
success         0
dtype: int64
In [27]:
GTDB_USA.shape
Out[27]:
(2683, 20)

Exploratory Data Analysis¶

In [28]:
GTDB_USA.describe()
Out[28]:
Year Month Day crit1 crit2 crit3 suicide Attack_Type Target_type targsubtype1 Killed Wounded Weapon_type success
count 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000 2683.000000
mean 1988.062244 6.262766 15.305255 0.981737 0.983228 0.992546 0.007082 4.400671 6.766306 40.430116 1.449124 9.773761 6.658964 0.828550
std 17.415874 3.347272 9.168060 0.133926 0.128441 0.086032 0.083870 2.210432 5.754176 29.868452 38.167953 298.031224 1.591475 0.376972
min 1970.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 0.000000 0.000000 1.000000 0.000000
25% 1972.000000 3.000000 7.000000 1.000000 1.000000 1.000000 0.000000 3.000000 2.000000 14.000000 0.000000 0.000000 6.000000 1.000000
50% 1982.000000 6.000000 15.000000 1.000000 1.000000 1.000000 0.000000 3.000000 5.000000 40.000000 0.000000 0.000000 6.000000 1.000000
75% 2001.000000 9.000000 23.000000 1.000000 1.000000 1.000000 0.000000 7.000000 14.000000 68.000000 0.000000 0.000000 8.000000 1.000000
max 2020.000000 12.000000 31.000000 1.000000 1.000000 1.000000 1.000000 9.000000 22.000000 110.000000 1385.000000 10878.000000 13.000000 1.000000
In [29]:
#GTDB_USA['Group'].value_counts().head(20)
In [30]:
mask = np.zeros_like(GTDB_USA.corr())
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize = (26,16))
sns.heatmap(GTDB_USA.corr(), mask=mask, annot=True, cmap="RdYlGn", linewidths=.75)
Out[30]:
<AxesSubplot:>

Number Of Terrorist Activities By Attack Type¶

In [31]:
# Number Of Terrorist Activities By Attack Type
#plt.subplots(figsize=(15,5))
sns.countplot(x=GTDB_USA["Attack_Type"], data=GTDB_USA, palette='viridis_r', order = GTDB_USA['Attack_Type'].value_counts().index)
plt.xticks(rotation=45)
plt.ylabel('Total')
plt.xlabel('Attack Type')
plt.xticks([0,1,2,3,4,5,6,7], ['FACILITY / \nINFRASTRUCTURE \nATTACK','BOMBING/\nEXPLOSION','ARMED ASSAULT','ASSASSINATION',
                                'HOSTAGE TAKING\n(BARRICADE INCIDENT)','UNARMED ASSAULT', 'HOSTAGE TAKING\n(KIDNAPPING)',
                                'HIJACKING'])
plt.title('Number Of Terrorist Activities By Attack Type')
plt.show()
In [32]:
GTDB_USA['Attack_Type'].value_counts()
Out[32]:
3    1110
7     928
2     350
1     118
8      88
5      48
6      20
4      18
9       3
Name: Attack_Type, dtype: int64

Weapon Type¶

In [33]:
# Terrorist Activities By Weapon Type
#plt.subplots(figsize=(15,5))
sns.countplot(x=GTDB_USA["Weapon_type"], data=GTDB_USA, palette='viridis_r', order = GTDB_USA['Weapon_type'].value_counts().index)
plt.xticks(rotation=45)
plt.ylabel('Total')
plt.xlabel('Weapon Type')
plt.xticks([0,1,2,3,4], ['Incendiary','Explosives','Firearms','Melee','Chemical'])
plt.title('Terrorist Activities By Weapon Type')
plt.show()
In [34]:
GTDB_USA['Weapon_type'].value_counts()
Out[34]:
6     1133
8      909
5      445
9       66
2       25
10      25
1       23
11      18
13      17
12      17
7        4
3        1
Name: Weapon_type, dtype: int64

Number Of Terrorist Activities Each Year¶

In [35]:
#Number Of Terrorist Activities Each Year
years = GTDB_USA["Year"].value_counts().to_frame()
years = years.rename(columns={'Year':'count'})
years['year'] = years.index
years.reset_index(inplace=True)
years = years.drop(columns='index')
years['killed'] = GTDB_USA.groupby(['Year']).Killed.sum()
years['wounded'] = GTDB_USA.groupby(['Year']).Wounded.sum()
years
Out[35]:
count year killed wounded
0 437 1970 NaN NaN
1 223 1971 NaN NaN
2 119 1975 NaN NaN
3 103 1977 NaN NaN
4 103 2020 NaN NaN
5 80 1976 NaN NaN
6 73 2018 NaN NaN
7 72 2019 NaN NaN
8 68 2016 NaN NaN
9 66 1974 NaN NaN
10 63 1972 NaN NaN
11 62 2017 NaN NaN
12 59 1995 NaN NaN
13 54 1984 NaN NaN
14 54 1973 NaN NaN
15 53 1999 NaN NaN
16 52 1994 NaN NaN
17 50 1982 NaN NaN
18 49 1978 NaN NaN
19 48 1980 NaN NaN
20 47 1981 NaN NaN
21 45 2001 NaN NaN
22 43 1979 NaN NaN
23 40 2015 NaN NaN
24 40 1997 NaN NaN
25 39 2000 NaN NaN
26 36 1983 NaN NaN
27 34 2003 NaN NaN
28 34 1996 NaN NaN
29 34 1985 NaN NaN
30 33 2002 NaN NaN
31 31 1992 NaN NaN
32 30 1989 NaN NaN
33 29 2014 NaN NaN
34 27 1990 NaN NaN
35 26 1986 NaN NaN
36 24 1998 NaN NaN
37 23 1991 NaN NaN
38 23 1987 NaN NaN
39 21 2005 NaN NaN
40 20 2012 NaN NaN
41 19 2013 NaN NaN
42 19 2010 NaN NaN
43 17 2008 NaN NaN
44 17 1988 NaN NaN
45 11 2009 NaN NaN
46 11 2007 NaN NaN
47 9 2011 NaN NaN
48 9 2004 NaN NaN
49 4 2006 NaN NaN
In [36]:
# Number Of Terrorist Activities Each Year
plt.subplots(figsize=(15,5))
sns.lineplot(data=years, x="year", y='count')
plt.xticks(rotation=90)
plt.title('Number Of Terrorist Activities Each Year')
plt.show()
In [37]:
years2 = pd.DataFrame(GTDB_USA.groupby(['Year']).Killed.sum())
years2['wounded'] = pd.DataFrame(GTDB_USA.groupby(['Year']).Wounded.sum())
years2['count'] = GTDB_USA["Year"].value_counts().to_frame()
#years2['year'] = years2.index
years2.reset_index(inplace=True)
years2['count'] = years2['count'].astype(float)
years2 = years2[years2.Year > 1971]
#years2.drop(years2[years2['Year']] == 1970)
years2
Out[37]:
Year Killed wounded count
2 1972 9.0 22.0 63.0
3 1973 45.0 33.0 54.0
4 1974 16.0 50.0 66.0
5 1975 21.0 146.0 119.0
6 1976 4.0 41.0 80.0
7 1977 4.0 10.0 103.0
8 1978 4.0 7.0 49.0
9 1979 14.0 36.0 43.0
10 1980 15.0 20.0 48.0
11 1981 7.0 12.0 47.0
12 1982 7.0 31.0 50.0
13 1983 7.0 5.0 36.0
14 1984 3.0 780.0 54.0
15 1985 3.0 12.0 34.0
16 1986 0.0 35.0 26.0
17 1987 1.0 1.0 23.0
18 1988 0.0 0.0 17.0
19 1989 2.0 14.0 30.0
20 1990 5.0 7.0 27.0
21 1991 26.0 31.0 23.0
22 1992 2.0 3.0 31.0
23 1994 8.0 16.0 52.0
24 1995 178.0 738.0 59.0
25 1996 2.0 84.0 34.0
26 1997 2.0 19.0 40.0
27 1998 4.0 2.0 24.0
28 1999 20.0 40.0 53.0
29 2000 8.0 10.0 39.0
30 2001 3014.0 21894.0 45.0
31 2002 4.0 11.0 33.0
32 2003 0.0 0.0 34.0
33 2004 0.0 0.0 9.0
34 2005 0.0 0.0 21.0
35 2006 0.0 9.0 4.0
36 2007 0.0 0.0 11.0
37 2008 2.0 12.0 17.0
38 2009 21.0 50.0 11.0
39 2010 4.0 17.0 19.0
40 2011 0.0 2.0 9.0
41 2012 7.0 7.0 20.0
42 2013 21.0 420.0 19.0
43 2014 26.0 19.0 29.0
44 2015 54.0 59.0 40.0
45 2016 68.0 150.0 68.0
46 2017 98.0 948.0 62.0
47 2018 42.0 61.0 73.0
48 2019 53.0 115.0 72.0
49 2020 12.0 32.0 103.0
In [38]:
# Number of Terrorist Attacks Per Year
sns.barplot(data=years2, x='Year', y='count',palette=('viridis_r'))
plt.xticks(rotation=45)
#fig=plt.gcf()
#plt.set_size_inches(100,10)
plt.title('Number of Terrorist Attacks Per Year')
plt.show()
In [39]:
# Creating trace1
trace1 = go.Scatter(
                    x = years2.Year,
                    y = years2.Killed,
                    mode = "lines",
                    name = "Total Killed",
                    marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
                    text= years2.Killed)
# Creating trace2
trace2 = go.Scatter(
                    x = years2.Year,
                    y = years2.wounded,
                    mode = "lines",
                    name = "Total Wounded",
                    marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
                    text= years2.wounded)

# Creating trace3
#trace3 = go.Scatter(
                    #x = years2.Year,
                    #y = years2.count,
                    #mode = "lines",
                    #name = "Total Attacks",
                    #marker = dict(color = 'rgba(255, 26, 80, 0.8)'),
                    #text= years2.count)

data = [trace1, trace2]
layout = dict(title = 'Total Casualties',
              xaxis= dict(title= 'Year',ticklen= 5,zeroline= False)
             )
fig = dict(data = data, layout = layout)
iplot(fig)

Number Of Terrorist Attacks in Each state¶

In [40]:
# Number Of Terrorist Activities in Each state
state_attacks = GTDB_USA["State"].value_counts().to_frame()
state_attacks = state_attacks.rename(columns={'State':'count'})
state_attacks['state'] = state_attacks.index
state_attacks.reset_index(inplace=True)
state_attacks = state_attacks.drop(columns='index')
#state_attacks.drop([46], axis=0, inplace = True) # Drop 'US. Virgin Islands'
#state_attacks.drop([16], axis=0, inplace = True) # Drop 'District of Columba'
In [41]:
state_attacks
Out[41]:
count state
0 579 California
1 499 New York
2 139 Florida
3 115 Washington
4 110 Illinois
5 89 Texas
6 83 District of Columbia
7 70 Oregon
8 64 Massachusetts
9 56 Ohio
10 50 Michigan
11 49 Pennsylvania
12 47 Colorado
13 46 Missouri
14 44 Arizona
15 44 New Jersey
16 41 Georgia
17 41 Wisconsin
18 41 Virginia
19 37 Maryland
20 34 North Carolina
21 34 Tennessee
22 28 Minnesota
23 28 Louisiana
24 25 Indiana
25 24 Nevada
26 24 New Mexico
27 22 Nebraska
28 22 Iowa
29 22 Utah
30 18 Connecticut
31 17 Alabama
32 15 Oklahoma
33 14 Mississippi
34 14 Kansas
35 12 Idaho
36 11 New Hampshire
37 11 South Carolina
38 9 South Dakota
39 8 Delaware
40 8 Arkansas
41 7 Montana
42 7 Kentucky
43 7 North Dakota
44 5 Hawaii
45 4 Vermont
46 3 Wyoming
47 2 West Virginia
48 2 Rhode Island
49 1 Alaska
50 1 Maine
In [42]:
# Number of Terrorist Attacks Per State
sns.barplot(x=GTDB_USA['State'].value_counts().values, y=GTDB_USA['State'].value_counts().index,palette=('viridis_r'))
plt.xticks(rotation=0)
fig=plt.gcf()
fig.set_size_inches(15,10)
plt.title('Number of Terrorist Attacks Per State')
plt.show()

California and New York have the highest number of terrorist attacks. This makes since as these are the 2 highest populated states. Later we will calculate the number of attacks per 100,000 people in each state, for a more accurate interpretation.

The Most Active Terror Groups¶

In [43]:
# The Most Active Terror Groups
sns.barplot(x=GTDB_USA['Group'].value_counts()[:10].values, y=GTDB_USA['Group'].value_counts()[:10].index,palette=('viridis_r'))
plt.xticks(rotation=0)
fig=plt.gcf()
fig.set_size_inches(20,10)
plt.title('Top 10 Most Active Terror Groups')
plt.show()
In [44]:
Top10Groups = GTDB_USA['Group'].value_counts()[:10].to_frame()
Top10Groups = Top10Groups.rename(columns={'Group':'count'})
Top10Groups['Group'] = Top10Groups.index
Top10Groups.reset_index(inplace=True)
Top10Groups = Top10Groups.drop(columns='index')
Top10Groups
Out[44]:
count Group
0 510 Unknown
1 233 Anti-Abortion extremists
2 167 Left-Wing Militants
3 127 White supremacists/nationalists
4 88 Fuerzas Armadas de Liberacion Nacional (FALN)
5 79 Black Nationalists
6 70 Animal Liberation Front (ALF)
7 70 Student Radicals
8 65 New World Liberation Front (NWLF)
9 65 Earth Liberation Front (ELF)
In [45]:
fig = px.pie(Top10Groups, values='count', names='Group',
             title='Top 10 Most Active Terror Groups',
             hover_data=['Group'])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

Most of the attacks are not affiliated with a group. However of the attacks that are, Anti-Abortion Extremists are the most prevalent.

Successful attacks¶

In [46]:
# Successful attacks
#plt.subplots(figsize=(15,5))
sns.countplot(x=GTDB_USA["success"], data=GTDB_USA, palette='mako_r', order = GTDB_USA['success'].value_counts().index)
#plt.xticks(rotation=45)
plt.ylabel('Total')
plt.xlabel('Success')
plt.xticks([0,1], ['Successful','Unsuccessful'])
plt.title('Successful attacks')
plt.show()
In [47]:
GTDB_USA['success'].value_counts()
Out[47]:
1    2223
0     460
Name: success, dtype: int64

Terrorist Attacks by Target¶

In [48]:
TargetPlotData = GTDB_USA[['Target', 'Killed', 'Wounded']]
TargetPlotData
Out[48]:
Target Killed Wounded
0 Police 0.0 0.0
1 Utilities 0.0 0.0
2 Military 0.0 0.0
3 Government (General) 0.0 0.0
4 Military 0.0 0.0
... ... ... ...
3116 Religious Figures/Institutions 0.0 0.0
3117 Religious Figures/Institutions 1.0 0.0
3118 Religious Figures/Institutions 0.0 0.0
3119 Business 1.0 3.0
3120 Religious Figures/Institutions 0.0 0.0

2683 rows × 3 columns

In [49]:
TargetPlotData['Target'].value_counts()
Out[49]:
Business                          643
Private Citizens & Property       404
Government (General)              306
Abortion Related                  256
Religious Figures/Institutions    230
Police                            188
Educational Institution           169
Military                          134
Government (Diplomatic)           112
Journalists & Media                55
Utilities                          52
Airports & Aircraft                48
NGO                                25
Transportation                     16
Telecommunication                  12
Tourists                           10
Terrorists/Non-State Militia        9
Violent Political Party             6
Food or Water Supply                3
Maritime                            3
Other                               2
Name: Target, dtype: int64
In [50]:
# terrorist attack targets grouped in categories
target_codes = []

for attack in TargetPlotData['Target'].values:
    if attack in ['Business', 'Journalists & Media', 'NGO']:
        target_codes.append(1)
    elif attack in ['Government (General)', 'Government (Diplomatic)']:
        target_codes.append(2)
    elif attack == 'Abortion Related':
        target_codes.append(4)
    elif attack == 'Educational Institution':
        target_codes.append(5)
    elif attack == 'Police':
        target_codes.append(6)
    elif attack == 'Military':
        target_codes.append(7)
    elif attack == 'Religious Figures/Institutions':
        target_codes.append(8)
    elif attack in ['Airports & Aircraft', 'Maritime', 'Transportation']:
        target_codes.append(9)
    elif attack in ['Food or Water Supply', 'Telecommunication', 'Utilities']:
        target_codes.append(10)
    else:
        target_codes.append(3)

TargetPlotData['Target'] = target_codes
target_categories = ['Business', 'Government', 'Individuals', 'Healthcare', 'Education',
                     'Police', 'Military', 'Religion', 'Transportation', 'Infrastructure']
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/2308851232.py:26: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [51]:
TargetPlotData['Target'].value_counts()
Out[51]:
1     723
3     431
2     418
4     256
8     230
6     188
5     169
7     134
10     67
9      67
Name: Target, dtype: int64
In [52]:
# terrorist attacks by target
target_count = np.asarray(TargetPlotData.groupby('Target').Target.count())
target_percent = np.round(target_count / sum(target_count) * 100, 2)

# terrorist attack fatalities by target
target_fatality = np.asarray(TargetPlotData.groupby('Target')['Killed'].sum())
target_yaxis = np.asarray([2.75, 3, 3.75, 1.1, 1.8, 2, 1.5, 1.75, 1.45])

# terrorist attack injuries by target
target_injury = np.asarray(TargetPlotData.groupby('Target')['Wounded'].sum())
target_xaxis = np.log10(target_injury)

target_text = []
for i in range(0, 9):
    target_text.append(target_categories[i] + ' (' + target_percent[i].astype(str) 
                       + '%)<br>' + target_fatality[i].astype(str) + ' Killed, '
                       + target_injury[i].astype(str) + ' Injured')

data = [go.Scatter(
        x = target_injury,
        y = target_fatality,
        text = target_text,
        mode = 'markers',
        hoverinfo = 'text',
        marker = dict(
            size = target_count / 6.5,
            opacity = 0.9,
            color = 'rgb(240, 140, 45)')
        )]

layout = go.Layout(
         title = 'Terrorist Attacks by Target in United States (1970-2022)',
         xaxis = dict(
             title = 'Injuries',
             type = 'log',
             range = [1.36, 3.25],
             tickmode = 'auto',
             nticks = 2,
             showline = True,
             showgrid = False
         ),
         yaxis = dict(
             title = 'Fatalities',
             type = 'log',
             range = [0.59, 3.45],
             tickmode = 'auto',
             nticks = 4,
             showline = True,
             showgrid = False)
         )

annotations = []
for i in range(0, 9):
    annotations.append(dict(x=target_xaxis[i], y=target_yaxis[i],
                            xanchor='auto', yanchor='auto',
                            text=target_categories[i], showarrow=True))
layout['annotations'] = annotations

figure = dict(data = data, layout = layout)
iplot(figure)
  • 26.45% of attacks take place in businesses
  • 15%.25% in Government buildings
  • 16.27% against Individuals(i.e. public places)

Terrorist Attacks by Weapon¶

In [53]:
WeaponPlotData = GTDB_USA[['Weapon', 'Killed', 'Wounded']]
WeaponPlotData
Out[53]:
Weapon Killed Wounded
0 Firearms 0.0 0.0
1 Explosives 0.0 0.0
2 Incendiary 0.0 0.0
3 Incendiary 0.0 0.0
4 Explosives 0.0 0.0
... ... ... ...
3116 Incendiary 0.0 0.0
3117 Firearms 1.0 0.0
3118 Incendiary 0.0 0.0
3119 Explosives 1.0 3.0
3120 Incendiary 0.0 0.0

2683 rows × 3 columns

In [54]:
WeaponPlotData['Weapon'].value_counts()
Out[54]:
Explosives                                                                     1133
Incendiary                                                                      909
Firearms                                                                        445
Melee                                                                            66
Chemical                                                                         25
Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)      25
Biological                                                                       23
Sabotage Equipment                                                               18
Unknown                                                                          17
Other                                                                            17
Fake Weapons                                                                      4
Radiological                                                                      1
Name: Weapon, dtype: int64
In [55]:
# terrorist attack weapons grouped in categories
weapon_codes = []

for attack in WeaponPlotData['Weapon'].values:
    if attack in ['Explosives', 'Sabotage Equipment']:
        weapon_codes.append(1)
    elif attack == 'Incendiary':
        weapon_codes.append(2)
    elif attack in ['Firearms', 'Fake Weapons']:
        weapon_codes.append(3)
    elif attack == 'Melee':
        weapon_codes.append(5)
    elif attack == 'Biological':
        weapon_codes.append(6)
    elif attack in ['Chemical', 'Radiological']:
        weapon_codes.append(7)
    elif 'Vehicle' in attack:
        weapon_codes.append(8)
    else:
        weapon_codes.append(4)

WeaponPlotData['Weapon'] = weapon_codes
weapon_categories = ['Explosives', 'Flammables', 'Firearms', 'Miscellaneous',
                     'Knives', 'Bacteria/Viruses', 'Chemicals', 'Vehicles']
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/2231559974.py:22: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [56]:
# terrorist attacks by weapon
weapon_count = np.asarray(WeaponPlotData.groupby('Weapon').Weapon.count())
weapon_percent = np.round(weapon_count / sum(weapon_count) * 100, 2)

# terrorist attack fatalities by weapon
weapon_fatality = np.asarray(WeaponPlotData.groupby('Weapon')['Killed'].sum())
weapon_yaxis = np.asarray([2, 1.75, 2.85, 0.85, 1.35, 0.83, 0.835, 3.45])

# terrorist attack injuries by weapon
weapon_injury = np.asarray(WeaponPlotData.groupby('Weapon')['Wounded'].sum())
weapon_xaxis = np.log10(weapon_injury)

weapon_text = []
for i in range(0, 8):
    weapon_text.append(weapon_categories[i] + ' (' + weapon_percent[i].astype(str) 
                       + '%)<br>' + weapon_fatality[i].astype(str) + ' Killed, '
                       + weapon_injury[i].astype(str) + ' Injured')

weapon_fatality[6] = 7
    
data = [go.Scatter(
        x = weapon_injury,
        y = weapon_fatality,
        text = weapon_text,
        mode = 'markers',
        hoverinfo = 'text',
        marker = dict(
            size = (weapon_count + 50) / 10,
            opacity = 0.9,
            color = 'rgb(240, 140, 45)')
        )]

layout = go.Layout(
         title = 'Terrorist Attacks by Weapon in United States (1970-2022)',
         xaxis = dict(
             title = 'Injuries',
             type = 'log',
             range = [0.45, 3.51],
             tickmode = 'auto',
             nticks = 4,
             showline = True,
             showgrid = False
         ),
         yaxis = dict(
             title = 'Fatalities',
             type = 'log',
             range = [0.65, 3.33],
             tickmode = 'auto',
             nticks = 3,
             showline = True,
             showgrid = False)
         )

annotations = []
for i in range(0, 8):
    annotations.append(dict(x=weapon_xaxis[i], y=weapon_yaxis[i],
                            xanchor='auto', yanchor='auto',
                            text=weapon_categories[i], showarrow=True))
layout['annotations'] = annotations

figure = dict(data = data, layout = layout)
iplot(figure)

NOTE: Double click the above graph to zoom out.

  • Miscellaneous - 43.5%
  • Flammables - 33.88%
  • Firearms - 16.73%

Maps¶

Terrorist Attacks by Latitude/Longitude¶

In [57]:
terror_location = GTDB_USA_all[['iyear', 'imonth', 'iday', 'nkill', 'nwound', 'latitude', 'longitude']]
terror_location.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day'},inplace=True)
terror_location['Day'][terror_location.Day == 0] = 1
terror_location['date'] = pd.to_datetime(terror_location[['Day', 'Month', 'Year']])
terror_location
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1639713690.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1639713690.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1639713690.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[57]:
Year Month Day nkill nwound latitude longitude date
0 1970 1 1 0.0 0.0 37.005105 -89.176269 1970-01-01
1 1970 1 2 0.0 0.0 37.791927 -122.225906 1970-01-02
2 1970 1 2 0.0 0.0 43.076592 -89.412488 1970-01-02
3 1970 1 3 0.0 0.0 43.072950 -89.386694 1970-01-03
4 1970 1 1 0.0 0.0 43.468500 -89.744299 1970-01-01
... ... ... ... ... ... ... ... ...
3116 2020 12 13 0.0 0.0 42.115512 -72.539521 2020-12-13
3117 2020 12 13 1.0 0.0 40.694257 -73.930680 2020-12-13
3118 2020 12 15 0.0 0.0 42.115512 -72.539521 2020-12-15
3119 2020 12 25 1.0 3.0 36.171469 -86.784299 2020-12-25
3120 2020 12 28 0.0 0.0 42.115512 -72.539521 2020-12-28

3121 rows × 8 columns

In [58]:
terror_location['text'] = terror_location['date'].dt.strftime('%B %-d, %Y') + '<br>' +\
                     terror_location['nkill'].astype(str) + ' Killed, ' +\
                     terror_location['nwound'].astype(str) + ' Injured'

fatality = dict(
           type = 'scattergeo',
           locationmode = 'USA-states',
           lon = terror_location[terror_location.nkill > 0]['longitude'],
           lat = terror_location[terror_location.nkill > 0]['latitude'],
           text = terror_location[terror_location.nkill > 0]['text'],
           mode = 'markers',
           name = 'Fatalities',
           hoverinfo = 'text+name',
           marker = dict(
               size = terror_location[terror_location.nkill > 0]['nkill'] ** 0.255 * 8,
               opacity = 0.95,
               color = 'rgb(240, 140, 45)')
           )
        
injury = dict(
         type = 'scattergeo',
         locationmode = 'USA-states',
         lon = terror_location[terror_location.nkill == 0]['longitude'],
         lat = terror_location[terror_location.nkill == 0]['latitude'],
         text = terror_location[terror_location.nkill == 0]['text'],
         mode = 'markers',
         name = 'Injuries',
         hoverinfo = 'text+name',
         marker = dict(
             #size = (terror_location[terror_location.nkill == 0]['nwound'] + 1) ** 0.245 * 8,
             opacity = 0.85,
             color = 'rgb(20, 150, 187)')
         )

layout = dict(
         title = 'Terrorist Attacks by Latitude/Longitude in United States (1970-2022)',
         showlegend = True,
         legend = dict(
             x = 0.85, y = 0.4
         ),
         geo = dict(
             scope = 'usa',
             projection = dict(type = 'albers usa'),
             showland = True,
             landcolor = 'rgb(250, 250, 250)',
             subunitwidth = 1,
             subunitcolor = 'rgb(217, 217, 217)',
             countrywidth = 1,
             countrycolor = 'rgb(217, 217, 217)',
             showlakes = True,
             lakecolor = 'rgb(255, 255, 255)')
         )

data = [fatality, injury]
figure = dict(data = data, layout = layout)
iplot(figure)
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/3975754310.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Terrorist Attacks Per Capita¶

In [59]:
us_states = np.asarray(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
                        'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
                        'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
                        'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
                        'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'])
# state population estimates for 2022 from US Census Bureau
state_population = np.asarray([5073187, 738023, 7303398, 3030646, 39995077, 5922618,
                               3612314, 1008350, 707109, 22085563, 10916760, 1474265,
                               1893410, 12808884, 6845874, 3219171, 2954832, 4539130,
                               4682633, 1369159, 6257958, 7126375, 10116069, 5787008,
                               2960075, 6188111, 1103187, 1988536, 3185426, 1389741,
                               9388414, 2129190, 20365879, 10620168, 800394, 11852036,
                               4000953, 4318492, 13062764, 1106341, 5217037, 901165,
                               7023788, 29945493, 3373162, 646545, 8757467, 7901429,
                               1781860, 5935064, 579495])

# terrorist attacks per 100,000 people in state
terror_perstate = np.asarray(GTDB_USA.groupby('State').State.count())
terror_percapita = np.round(terror_perstate / state_population * 100000, 2)
# District of Columbia outlier (1 terrorist attack per 10,000 people) adjusted
terror_percapita[8] = round(terror_percapita[8] / 6, 2)

terror_scale = [[0, 'rgb(252, 232, 213)'], [1, 'rgb(240, 140, 45)']]

data = [dict(
        type = 'choropleth',
        autocolorscale = False,
        colorscale = terror_scale,
        showscale = False,
        locations = us_states,
        locationmode = 'USA-states',
        z = terror_percapita,
        marker = dict(
            line = dict(
                color = 'rgb(255, 255, 255)',
                width = 2)
            )
        )]

layout = dict(
         title = 'Terrorist Attacks per 100,000 People in United States (1970-2022)',
         geo = dict(
             scope = 'usa',
             projection = dict(type = 'albers usa'),
             countrycolor = 'rgb(255, 255, 255)',
             showlakes = True,
             lakecolor = 'rgb(255, 255, 255)')
         )

figure = dict(data = data, layout = layout)
iplot(figure)

Checking For A Common Motive¶

In [60]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
In [61]:
textData = GTDB_USA_all[['motive', 'attacktype1_txt', 'targtype1_txt']]
textData
Out[61]:
motive attacktype1_txt targtype1_txt
0 To protest the Cairo Illinois Police Deparment Armed Assault Police
1 NaN Bombing/Explosion Utilities
2 To protest the War in Vietnam and the draft Facility/Infrastructure Attack Military
3 To protest the War in Vietnam and the draft Facility/Infrastructure Attack Government (General)
4 NaN Bombing/Explosion Military
... ... ... ...
3116 The specific motive is unknown; however, sourc... Facility/Infrastructure Attack Religious Figures/Institutions
3117 Luis Vasquez, an unaffiliated individual, clai... Hostage Taking (Kidnapping) Religious Figures/Institutions
3118 The specific motive is unknown; however, sourc... Facility/Infrastructure Attack Religious Figures/Institutions
3119 The specific motive is unknown; however, sourc... Bombing/Explosion Business
3120 The specific motive is unknown; however, sourc... Facility/Infrastructure Attack Religious Figures/Institutions

3121 rows × 3 columns

In [62]:
textData.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3121 entries, 0 to 3120
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   motive           1691 non-null   object
 1   attacktype1_txt  3121 non-null   object
 2   targtype1_txt    3121 non-null   object
dtypes: object(3)
memory usage: 73.3+ KB
In [63]:
textData = textData.dropna()
textData.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1691 entries, 0 to 3120
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   motive           1691 non-null   object
 1   attacktype1_txt  1691 non-null   object
 2   targtype1_txt    1691 non-null   object
dtypes: object(3)
memory usage: 52.8+ KB
In [64]:
textData = textData[textData.motive.isin(['nan', 'Unknown']) == False]
In [65]:
textData
Out[65]:
motive attacktype1_txt targtype1_txt
0 To protest the Cairo Illinois Police Deparment Armed Assault Police
2 To protest the War in Vietnam and the draft Facility/Infrastructure Attack Military
3 To protest the War in Vietnam and the draft Facility/Infrastructure Attack Government (General)
5 Protest the draft and Vietnam War Facility/Infrastructure Attack Military
7 To protest United States owned businesses in P... Facility/Infrastructure Attack Business
... ... ... ...
3116 The specific motive is unknown; however, sourc... Facility/Infrastructure Attack Religious Figures/Institutions
3117 Luis Vasquez, an unaffiliated individual, clai... Hostage Taking (Kidnapping) Religious Figures/Institutions
3118 The specific motive is unknown; however, sourc... Facility/Infrastructure Attack Religious Figures/Institutions
3119 The specific motive is unknown; however, sourc... Bombing/Explosion Business
3120 The specific motive is unknown; however, sourc... Facility/Infrastructure Attack Religious Figures/Institutions

1514 rows × 3 columns

Initialize TfidfVectorizer with desired parameters (default smoothing and normalization)

In [66]:
tfidf_vectorizer = TfidfVectorizer(analyzer='word',stop_words= 'english')

The feature that has the text data we want to work with is 'motive'. We will use that to TfidfVectorizer. Using the fit_transform() method to learn vocabulary and idf. This method returns document-term matrix.

In [67]:
tfidf_vector = tfidf_vectorizer.fit_transform(textData['motive'])
tfidf_vector
Out[67]:
<1514x3305 sparse matrix of type '<class 'numpy.float64'>'
	with 18514 stored elements in Compressed Sparse Row format>
In [68]:
# How many words did we count?
len(tfidf_vectorizer.get_feature_names())
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning:

Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.

Out[68]:
3305
In [69]:
# list the words
tfidf_vectorizer.get_feature_names()[:10] # listing the first 10 only
Out[69]:
['000', '10', '100', '11', '11th', '150', '1619', '17', '18', '19']

Make a DataFrame out of the resulting tf–idf vector, setting the “feature names” or words as columns and the titles as rows

In [70]:
tfidf_df = pd.DataFrame(tfidf_vector.toarray(), columns=tfidf_vectorizer.get_feature_names())
tfidf_df
Out[70]:
000 10 100 11 11th 150 1619 17 18 19 ... yugoslavia zaremski zealand zebra zeldin zero zhang zionist zionists zuniga
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1509 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1510 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1511 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1512 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1513 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

1514 rows × 3305 columns

It is clear that we have a lot of features now. But if we take a closer look at our matrix we can easily tell that it is very sparse with many 0 values.

Let's create some functions to help us with our analysis keeping in mind when we say document here were are referring to an single entry(event) in our dataset.

This function takes a single row of the tf-idf matrix (corresponding to a particular document), and returns the n highest scoring words (or more generally tokens or features):

In [71]:
# Get top n tfidf values in row and return them with their corresponding feature names.
def top_tfidf_feats(row, features, top_n=25):
    topn_ids = np.argsort(row)[::-1][:top_n]
    top_feats = [(features[i], row[i]) for i in topn_ids]
    df = pd.DataFrame(top_feats)
    df.columns = ['feature', 'tfidf']
    return df

Here we use argsort to produce the indices that would order the row by tf-idf value, reverse them (into descending order), and select the first top_n. We then return a pandas DataFrame with the words themselves (feature names) and their corresponding score.

The result of a tf-idf, however, is typically a sparse matrix, which doesn’t support all the usual matrix or array operations. So in order to apply the above function to inspect a particular document, we convert a single row into dense format first:

In [72]:
# Top tfidf features in specific document (matrix row)
def top_feats_in_doc(Xtr, features, row_id, top_n=25):
    row = np.squeeze(Xtr[row_id].toarray())
    return top_tfidf_feats(row, features, top_n)

For example, we can look at the first document we have and use this function to show the top 10 words used in the first document of our matrix.

In [73]:
textData.iloc[0]['motive']
Out[73]:
'To protest the Cairo Illinois Police Deparment'
In [74]:
top_feats_in_doc(tfidf_vector,tfidf_vectorizer.get_feature_names(),0,10)
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning:

Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.

Out[74]:
feature tfidf
0 deparment 0.583594
1 cairo 0.583594
2 illinois 0.443474
3 police 0.307955
4 protest 0.165317
5 faculty 0.000000
6 far 0.000000
7 family 0.000000
8 families 0.000000
9 falsely 0.000000

Calculate the average tf-idf score of all words across a number of documents (in this case all documents), i.e. the average per column of a tf-idf matrix:

In [75]:
def top_mean_feats(Xtr, features, grp_ids=None, min_tfidf=0.1, top_n=25):
    if grp_ids:
        D = Xtr[grp_ids].toarray()
    else:
        D = Xtr.toarray()

    D[D < min_tfidf] = 0
    tfidf_means = np.mean(D, axis=0)
    return top_tfidf_feats(tfidf_means, features, top_n)

Calling this function with grp_ids=None, gives us the most important words across the whole dataset. Here are the top 10:

In [76]:
top_mean_feats(tfidf_vector,tfidf_vectorizer.get_feature_names(),top_n=15)
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning:

Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.

Out[76]:
feature tfidf
0 protest 0.067913
1 abortion 0.063500
2 sabotage 0.044678
3 motive 0.032757
4 practice 0.032416
5 facilities 0.031839
6 operating 0.031658
7 unknown 0.030949
8 specific 0.030446
9 attack 0.028394
10 draft 0.025381
11 united 0.022003
12 police 0.020938
13 states 0.020117
14 war 0.018931

let’s calculate the mean tf-idf scores depending on a event type label. This function returns a list of dataframes, where each dataframe holds top_n features and their mean tfidf value calculated across rows(events) with the same class label.

In [77]:
def top_feats_by_class(Xtr, y, features, min_tfidf=0.1, top_n=25):
    dfs = []
    labels = np.unique(y)
    for label in labels:
        ids = np.where(y==label)
        feats_df = top_mean_feats(Xtr, features, ids, min_tfidf=min_tfidf, top_n=top_n)
        feats_df.label = label
        dfs.append(feats_df)
    return dfs
In [78]:
dfs = top_feats_by_class(tfidf_vector,textData['attacktype1_txt'],tfidf_vectorizer.get_feature_names())
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning:

Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.

Instead of printing the features out as a table, let’s create a figure in matplotlib:

In [79]:
# Plot the data frames returned by the function plot_tfidf_classfeats().
def plot_tfidf_classfeats_h(dfs):
    fig = plt.figure(figsize=(45, 25), facecolor="w")
    x = np.arange(len(dfs[0]))
    for i, df in enumerate(dfs):
        ax = fig.add_subplot(2, 4, i+1)
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.set_frame_on(False)
        ax.get_xaxis().tick_bottom()
        ax.get_yaxis().tick_left()
        ax.set_xlabel("Mean Tf-Idf Score", labelpad=16, fontsize=14)
        ax.set_title("label = " + str(df.label), fontsize=16)
        ax.ticklabel_format(axis='x', style='sci', scilimits=(-2,2))
        ax.barh(x, df.tfidf, align='center', color='#005035')
        ax.set_yticks(x)
        ax.set_ylim([-1, x[-1]+1])
        yticks = ax.set_yticklabels(df.feature)
        plt.subplots_adjust(bottom=0.09, right=0.97, left=0.15, top=0.95, wspace=0.52)
    plt.show()
In [80]:
plot_tfidf_classfeats_h(dfs)
In [81]:
#dfs2 = top_feats_by_class(tfidf_vector,textData['targtype1_txt'],tfidf_vectorizer.get_feature_names())
In [82]:
#plot_tfidf_classfeats_h(dfs2)

Modeling¶

In [83]:
Model_data = GTDB_USA[['Month', 'State', 'Attack_Type', 'Target_type', 'Weapon_type', 'success']]
Model_data.head()
Out[83]:
Month State Attack_Type Target_type Weapon_type success
0 1 Illinois 2 3 5 1
1 1 California 3 21 6 1
2 1 Wisconsin 7 4 8 1
3 1 Wisconsin 7 2 8 1
4 1 Wisconsin 3 4 6 0
In [84]:
# create dummy fields before split
# converting categorical features using one-hot encoding (i.e., dummy features)
# textbook approach using statsmodels categorical function is depricated
# we will be using pandas get_dummies function instead
Model_data = pd.get_dummies(Model_data, columns=['State'], prefix = ['dummy'])
In [85]:
print(Model_data.columns)
Index(['Month', 'Attack_Type', 'Target_type', 'Weapon_type', 'success',
       'dummy_Alabama', 'dummy_Alaska', 'dummy_Arizona', 'dummy_Arkansas',
       'dummy_California', 'dummy_Colorado', 'dummy_Connecticut',
       'dummy_Delaware', 'dummy_District of Columbia', 'dummy_Florida',
       'dummy_Georgia', 'dummy_Hawaii', 'dummy_Idaho', 'dummy_Illinois',
       'dummy_Indiana', 'dummy_Iowa', 'dummy_Kansas', 'dummy_Kentucky',
       'dummy_Louisiana', 'dummy_Maine', 'dummy_Maryland',
       'dummy_Massachusetts', 'dummy_Michigan', 'dummy_Minnesota',
       'dummy_Mississippi', 'dummy_Missouri', 'dummy_Montana',
       'dummy_Nebraska', 'dummy_Nevada', 'dummy_New Hampshire',
       'dummy_New Jersey', 'dummy_New Mexico', 'dummy_New York',
       'dummy_North Carolina', 'dummy_North Dakota', 'dummy_Ohio',
       'dummy_Oklahoma', 'dummy_Oregon', 'dummy_Pennsylvania',
       'dummy_Rhode Island', 'dummy_South Carolina', 'dummy_South Dakota',
       'dummy_Tennessee', 'dummy_Texas', 'dummy_Utah', 'dummy_Vermont',
       'dummy_Virginia', 'dummy_Washington', 'dummy_West Virginia',
       'dummy_Wisconsin', 'dummy_Wyoming'],
      dtype='object')
In [86]:
Model_data.head()
Out[86]:
Month Attack_Type Target_type Weapon_type success dummy_Alabama dummy_Alaska dummy_Arizona dummy_Arkansas dummy_California ... dummy_South Dakota dummy_Tennessee dummy_Texas dummy_Utah dummy_Vermont dummy_Virginia dummy_Washington dummy_West Virginia dummy_Wisconsin dummy_Wyoming
0 1 2 3 5 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 3 21 6 1 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 0
2 1 7 4 8 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
3 1 7 2 8 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
4 1 3 4 6 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0

5 rows × 56 columns

In [87]:
# create training and testing sets at 80/20
Model_data_train, Model_data_test = train_test_split(Model_data, test_size = .2)
In [88]:
# for learning our model we want all columns except 'success'
x = Model_data.drop('success', axis=1)

x_test = Model_data.drop('success', axis=1)
In [89]:
# we want to predict the success let's make that our y
y = Model_data['success']
y_test = Model_data['success']

Balance the Training Set Using the SMOTE Technique¶

SMOTE or Synthetic Minority Oversampling Technique is an oversampling technique but SMOTE working differently than your typical oversampling.

In a classic oversampling technique, the minority data is duplicated from the minority data population. While it increases the number of data, it does not give any new information or variation to the machine learning model.

SMOTE works by utilizing a k-nearest neighbor algorithm to create synthetic data. SMOTE first start by choosing random data from the minority class, then k-nearest neighbors from the data are set. Synthetic data would then be made between the random data and the randomly selected k-nearest neighbor.

In [90]:
sns.set_theme(style="darkgrid")
sns.countplot(y=y, data=Model_data, palette="mako_r")
plt.ylabel('Success')
plt.xlabel('Total')
plt.yticks([0,1], ['Unsuccessful','Successful'])
plt.title('Unbalanced Data')
plt.show()
In [91]:
Model_data['success'].value_counts()
Out[91]:
1    2223
0     460
Name: success, dtype: int64

This graph shows us that our predictor success is heavily skewed towards successful attacks. We have 2223 successful attacks and 460 unsuccessful attacks. In an effort to increase the accuracy of our models we can apply the Smote Balancing technique.

In [92]:
# Over Sample using SMOTE
from imblearn.over_sampling import SMOTE
x, y = SMOTE(k_neighbors=1).fit_resample(x, y)

To run models using under sampling using Cluster Centroids comment out the above SMOTE cell and run the cell below.

In [93]:
# Under sample using Cluster Centroids
#from imblearn.under_sampling import ClusterCentroids
#x, y = ClusterCentroids(random_state=42).fit_resample(x, y)
In [94]:
sns.set_theme(style="darkgrid")
sns.countplot(y=y, data=Model_data, palette="mako_r")
plt.ylabel('Success')
plt.xlabel('Total')
plt.yticks([0,1], ['Unsuccessful','Successful'])
plt.title('Balanced Data')
plt.show()

This shows us that the training set has been balanced to the distribution of successful attacks. After balancing the data approximately 79.3% of the unsuccessful attacks are synthetic data created by the SMOTE Technique, this may result in less accurate models later.

Baseline Model¶

The SimpleClassifier first tries several baseline and instantaneous models, potentially on subsampled data, to get an idea of what a low baseline should be. This again is a good place to surface data leakage, as well as find the main discriminative features in the dataset. https://dabl.github.io/0.2.0/index.html#

In [95]:
#!pip install dabl
import dabl

BaseModel = dabl.SimpleClassifier(random_state=0).fit(Model_data, target_col="success")
Running DummyClassifier(random_state=0)
accuracy: 0.829 average_precision: 0.171 roc_auc: 0.500 recall_macro: 0.500 f1_macro: 0.453
=== new best DummyClassifier(random_state=0) (using recall_macro):
accuracy: 0.829 average_precision: 0.171 roc_auc: 0.500 recall_macro: 0.500 f1_macro: 0.453

Running GaussianNB()
accuracy: 0.245 average_precision: 0.237 roc_auc: 0.676 recall_macro: 0.532 f1_macro: 0.238
=== new best GaussianNB() (using recall_macro):
accuracy: 0.245 average_precision: 0.237 roc_auc: 0.676 recall_macro: 0.532 f1_macro: 0.238

Running MultinomialNB()
accuracy: 0.817 average_precision: 0.366 roc_auc: 0.698 recall_macro: 0.565 f1_macro: 0.574
=== new best MultinomialNB() (using recall_macro):
accuracy: 0.817 average_precision: 0.366 roc_auc: 0.698 recall_macro: 0.565 f1_macro: 0.574

Running DecisionTreeClassifier(class_weight='balanced', max_depth=1, random_state=0)
accuracy: 0.579 average_precision: 0.212 roc_auc: 0.600 recall_macro: 0.600 f1_macro: 0.514
=== new best DecisionTreeClassifier(class_weight='balanced', max_depth=1, random_state=0) (using recall_macro):
accuracy: 0.579 average_precision: 0.212 roc_auc: 0.600 recall_macro: 0.600 f1_macro: 0.514

Running DecisionTreeClassifier(class_weight='balanced', max_depth=5, random_state=0)
accuracy: 0.624 average_precision: 0.296 roc_auc: 0.660 recall_macro: 0.635 f1_macro: 0.552
=== new best DecisionTreeClassifier(class_weight='balanced', max_depth=5, random_state=0) (using recall_macro):
accuracy: 0.624 average_precision: 0.296 roc_auc: 0.660 recall_macro: 0.635 f1_macro: 0.552

Running DecisionTreeClassifier(class_weight='balanced', min_impurity_decrease=0.01,
                       random_state=0)
accuracy: 0.594 average_precision: 0.239 roc_auc: 0.646 recall_macro: 0.628 f1_macro: 0.531
Running LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,
                   random_state=0)
accuracy: 0.643 average_precision: 0.357 roc_auc: 0.299 recall_macro: 0.657 f1_macro: 0.571
=== new best LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,
                   random_state=0) (using recall_macro):
accuracy: 0.643 average_precision: 0.357 roc_auc: 0.299 recall_macro: 0.657 f1_macro: 0.571

Running LogisticRegression(C=1, class_weight='balanced', max_iter=1000, random_state=0)
accuracy: 0.657 average_precision: 0.369 roc_auc: 0.303 recall_macro: 0.656 f1_macro: 0.578

Best model:
LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,
                   random_state=0)
Best Scores:
accuracy: 0.643 average_precision: 0.357 roc_auc: 0.299 recall_macro: 0.657 f1_macro: 0.571

Baseline Results

Best model: LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000, random_state=0) Best Scores: Accuracy: 0.643, Average_precision: 0.357, roc_auc: 0.299, recall_macro: 0.657, f1_macro: 0.571

In [96]:
dabl.explain(BaseModel)

Naive Bayes¶

Naive Bayes is a probabilistic classifier inspired by the Bayes theorem under a simple assumption which is the attributes are conditionally independent.

The classification is conducted by deriving the maximum posterior which is the maximal P(Ci|X) with the above assumption applying to Bayes theorem. This assumption greatly reduces the computational cost by only counting the class distribution. Even though the assumption is not valid in most cases since the attributes are dependent, surprisingly Naive Bayes has able to perform impressively.

Naive Bayes is a very simple algorithm to implement and good results have obtained in most cases. It can be easily scalable to larger datasets since it takes linear time, rather than by expensive iterative approximation as used for many other types of classifiers.

Naive Bayes can suffer from a problem called the zero probability problem. When the conditional probability is zero for a particular attribute, it fails to give a valid prediction. This needs to be fixed explicitly using a Laplacian estimator.

In [97]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

# Let's try a Naive Bayes classifier
nb_model = MultinomialNB()
nb_model.fit(x, y)
 
y_pred = nb_model.predict(x_test)
 
# Summary of the predictions made by the classifier
print(classification_report(y_test, y_pred))
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred))

# Accuracy score
NBAcc = accuracy_score(y_pred,y_test)
print('accuracy is', NBAcc)

test_predictions_proba = nb_model.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)

# If our target is a numerical value we can also check MSE and RMSE.  
# NOTE - This will not work here and cause an error since our pedictions are categorical
# mean_squared_error = mean_squared_error(y_test, predictions)
# mean_absolute_error = mean_absolute_error(y_test, predictions)
# result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}
              precision    recall  f1-score   support

           0       0.22      0.62      0.33       460
           1       0.88      0.56      0.68      2223

    accuracy                           0.57      2683
   macro avg       0.55      0.59      0.51      2683
weighted avg       0.76      0.57      0.62      2683

Confusion Matrix
[[ 284  176]
 [ 979 1244]]
accuracy is 0.569511740588893
Log Loss: 0.6457
==============================
In [98]:
# contingency table of the actual and predicted score text

y_predicted = nb_model.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
Out[98]:
Predicted 0 1 Total
Actual
0 284 176 460
1 979 1244 2223
Total 1263 1420 2683
In [99]:
from sklearn.metrics import plot_confusion_matrix

plot_confusion_matrix(nb_model, x_test, y_test) 
plt.grid(visible=False)
plt.show()
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning:

Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.

In [100]:
# Native Bayes using Grid Search
from sklearn.model_selection import GridSearchCV

params = {'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]}
multinomial_nb_grid = GridSearchCV(MultinomialNB(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
multinomial_nb_grid.fit(x,y)
print('Train Accuracy : %.3f'%multinomial_nb_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%multinomial_nb_grid.best_estimator_.score(x_test, y_test))
GS_NBAcc = multinomial_nb_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_NBAcc)
print('Best Parameters : ',multinomial_nb_grid.best_params_)
Fitting 10 folds for each of 5 candidates, totalling 50 fits
Train Accuracy : 0.671
Test Accuracy : 0.571
Best Accuracy Through Grid Search : 0.645
Best Parameters :  {'alpha': 0.01}

Decision Tree¶

Decision Trees observes features of an object and trains a model in the structure of a tree to predict data in the future to produce meaningful continuous output. Continuous output means that the output/result is not discrete, i.e., it is not represented just by a discrete, known set of numbers or values

In [101]:
# Let's try a decision tree model

from sklearn.tree import DecisionTreeClassifier

dt01 = DecisionTreeClassifier(criterion="gini").fit(x,y)
prediction_dt01 = dt01.predict(x_test)

# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_dt01))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_dt01))
# Accuracy score
DTAcc = accuracy_score(prediction_dt01,y_test)
print('accuracy is',DTAcc)

test_predictions_proba = dt01.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
              precision    recall  f1-score   support

           0       0.71      0.92      0.80       460
           1       0.98      0.92      0.95      2223

    accuracy                           0.92      2683
   macro avg       0.85      0.92      0.88      2683
weighted avg       0.94      0.92      0.93      2683

Confusion Matrix
[[ 424   36]
 [ 174 2049]]
accuracy is 0.9217294073797987
Log Loss: 0.1326
==============================
In [102]:
# contingency table of the actual and predicted score text

y_predicted = dt01.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
Out[102]:
Predicted 0 1 Total
Actual
0 424 36 460
1 174 2049 2223
Total 598 2085 2683

Feature Importance of Decision Tree

In [103]:
fi = dt01.feature_importances_ #feature importance array
fi = pd.Series(data = fi, index = x.columns) #convert to Pandas series for plotting
fi.sort_values(ascending=False, inplace=True) #sort descending
In [104]:
#create bar plot
plt.figure(figsize=(20, 20))
chart = sns.barplot(x=fi, y=fi.index, palette=sns.color_palette("viridis_r", n_colors=len(fi)))
chart.set_xticklabels(chart.get_xticklabels(), rotation=45, horizontalalignment='right')
plt.show()
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/2019358956.py:4: UserWarning:

FixedFormatter should only be used together with FixedLocator

In [105]:
# Decision Tree using Grid Search

params = {'max_depth': [3, 5, 10, 20, 30],
'criterion': ["gini", "entropy"],
'splitter': ["best", "random"]}
dt_grid = GridSearchCV(DecisionTreeClassifier(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
dt_grid.fit(x,y)
print('Train Accuracy : %.3f'%dt_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%dt_grid.best_estimator_.score(x_test, y_test))
GS_DTAcc = dt_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_DTAcc)
print('Best Parameters : ',dt_grid.best_params_)
Fitting 10 folds for each of 20 candidates, totalling 200 fits
Train Accuracy : 0.928
Test Accuracy : 0.892
Best Accuracy Through Grid Search : 0.781
Best Parameters :  {'criterion': 'gini', 'max_depth': 30, 'splitter': 'best'}

Random Forrest¶

Random forest is a commonly-used machine learning algorithm, which combines the output of multiple decision trees to reach a single result. Its ease of use and flexibility have fueled its adoption, as it handles both classification and regression problems.

In [106]:
# Let's try a Random Forest model

from sklearn.ensemble import RandomForestClassifier
rf01 = RandomForestClassifier(n_estimators = 100,criterion="gini").fit(x,y)
prediction_rf01 = rf01.predict(x_test)

# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_rf01))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_rf01))
# Accuracy score
RFAcc = accuracy_score(prediction_rf01,y_test)
print('accuracy is', RFAcc)

test_predictions_proba = rf01.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
              precision    recall  f1-score   support

           0       0.73      0.88      0.80       460
           1       0.97      0.93      0.95      2223

    accuracy                           0.92      2683
   macro avg       0.85      0.91      0.87      2683
weighted avg       0.93      0.92      0.93      2683

Confusion Matrix
[[ 406   54]
 [ 153 2070]]
accuracy is 0.9228475587029444
Log Loss: 0.2031
==============================
In [107]:
# contingency table of the actual and predicted score text

y_predicted = rf01.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
Out[107]:
Predicted 0 1 Total
Actual
0 406 54 460
1 153 2070 2223
Total 559 2124 2683
In [108]:
def plot_confusion_matrix(Y_test, Y_preds):
    conf_mat = confusion_matrix(Y_test, Y_preds)
    #print(conf_mat)
    fig = plt.figure(figsize=(6,6))
    plt.matshow(conf_mat, cmap=plt.cm.Blues, fignum=1)
    plt.yticks(range(2), range(2))
    plt.xticks(range(2), range(2))
    plt.colorbar()
    plt.grid(visible=False);
    for i in range(2):
        for j in range(2):
            plt.text(i-0.2,j+0.1, str(conf_mat[j, i]), color='tab:red')

plot_confusion_matrix(y_test, rf01.predict(x_test))
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/3873633094.py:8: MatplotlibDeprecationWarning:

Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first.

In [109]:
# Random Forrest using Grid Search

params = {'max_depth': [3, 5, 10, 20, 30],
'n_estimators' : [10, 100],
'criterion': ["gini", "entropy", "log_loss"]}
rf_grid = GridSearchCV(RandomForestClassifier(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
rf_grid.fit(x,y)
print('Train Accuracy : %.3f'%rf_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%rf_grid.best_estimator_.score(x_test, y_test))
GS_RFAcc = rf_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_RFAcc)
print('Best Parameters : ',rf_grid.best_params_)
Fitting 10 folds for each of 30 candidates, totalling 300 fits
Train Accuracy : 0.937
Test Accuracy : 0.907
Best Accuracy Through Grid Search : 0.809
Best Parameters :  {'criterion': 'gini', 'max_depth': 30, 'n_estimators': 100}

Support Vector Machine¶

Support Vector Machine, SVM, can be used for both regression and classification tasks. But, it is widely used in classification objectives. The objective of the support vector machine algorithm is to find a hyperplane in an N-dimensional space(N — the number of features) that distinctly classifies the data points.

To separate the two classes of data points, there are many possible hyperplanes that could be chosen. Our objective is to find a plane that has the maximum margin, i.e the maximum distance between data points of both classes. Maximizing the margin distance provides some reinforcement so that future data points can be classified with more confidence.

In [110]:
# Let's try a Support Vector Machine model

from sklearn import svm
svm_model = svm.SVC(decision_function_shape='ovo', probability=True).fit(x,y)
prediction_svm = svm_model.predict(x_test)

# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_svm))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_svm))

# Accuracy score
SVMAcc = accuracy_score(prediction_svm,y_test)
print('accuracy is', SVMAcc)

test_predictions_proba = svm_model.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
              precision    recall  f1-score   support

           0       0.22      0.74      0.34       460
           1       0.90      0.47      0.62      2223

    accuracy                           0.52      2683
   macro avg       0.56      0.61      0.48      2683
weighted avg       0.78      0.52      0.57      2683

Confusion Matrix
[[ 342  118]
 [1181 1042]]
accuracy is 0.5158404770778979
Log Loss: 0.6491
==============================
In [111]:
# contingency table of the actual and predicted score text

y_predicted = svm_model.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
Out[111]:
Predicted 0 1 Total
Actual
0 342 118 460
1 1181 1042 2223
Total 1523 1160 2683
In [112]:
# SVM using Grid Search

params = {'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
'decision_function_shape' : ['ovr', 'ovo']}
svm_grid = GridSearchCV(svm.SVC(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
svm_grid.fit(x,y)
print('Train Accuracy : %.3f'%svm_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%svm_grid.best_estimator_.score(x_test, y_test))
GS_SVMAcc = svm_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_SVMAcc)
print('Best Parameters : ',svm_grid.best_params_)
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Train Accuracy : 0.711
Test Accuracy : 0.717
Best Accuracy Through Grid Search : 0.694
Best Parameters :  {'decision_function_shape': 'ovr', 'kernel': 'linear'}

Neural Network¶

Multi-layer Perceptron (MLP) is a supervised learning algorithm that learns a function by training on a dataset. Given a set of features and a target , it can learn a non-linear function approximator for either classification or regression. It is different from logistic regression, in that between the input and the output layer, there can be one or more non-linear layers, called hidden layers.

In [113]:
# Let's try a Neural Network model

from sklearn.neural_network import MLPClassifier

NeuralNetwork_model = MLPClassifier(solver='lbfgs', alpha=1e-5, max_iter=1000, hidden_layer_sizes=(5, 2), random_state=1).fit(x,y)
prediction_NeuralNetwork = NeuralNetwork_model.predict(x_test)

# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_NeuralNetwork))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_NeuralNetwork))

# Accuracy score
NNAcc = accuracy_score(prediction_NeuralNetwork,y_test)
print('accuracy is', NNAcc)

test_predictions_proba = NeuralNetwork_model.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
              precision    recall  f1-score   support

           0       0.24      0.32      0.28       460
           1       0.85      0.79      0.82      2223

    accuracy                           0.71      2683
   macro avg       0.55      0.56      0.55      2683
weighted avg       0.75      0.71      0.73      2683

Confusion Matrix
[[ 148  312]
 [ 457 1766]]
accuracy is 0.7133805441669773
Log Loss: 0.538
==============================
In [114]:
# Neural Network using Grid Search

params = {'solver' : ['lbfgs', 'adam'],
'learning_rate_init': [0.0001],
'max_iter': [1000],
'hidden_layer_sizes': [(50, 40, 30, 20, 10)],
'activation': ['logistic'],
'alpha': [0.0001, 0.001, 0.005],
'early_stopping': [True, False]}
NeuralNetwork_grid = GridSearchCV(MLPClassifier(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
NeuralNetwork_grid.fit(x,y)
print('Train Accuracy : %.3f'%NeuralNetwork_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%NeuralNetwork_grid.best_estimator_.score(x_test, y_test))
GS_NNAcc = NeuralNetwork_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_NNAcc)
print('Best Parameters : ',NeuralNetwork_grid.best_params_)
Fitting 10 folds for each of 12 candidates, totalling 120 fits
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names
  warnings.warn(
Train Accuracy : 0.538
Test Accuracy : 0.425
Best Accuracy Through Grid Search : 0.505
Best Parameters :  {'activation': 'logistic', 'alpha': 0.005, 'early_stopping': True, 'hidden_layer_sizes': (50, 40, 30, 20, 10), 'learning_rate_init': 0.0001, 'max_iter': 1000, 'solver': 'lbfgs'}

Model Comparison¶

In [115]:
compare = pd.DataFrame({'Model': ['Decision Tree', 'Random Forest', 'Naive Bayes', 'SVM', 'Neural Network'], 
                        'Accuracy': [DTAcc*100, RFAcc*100, NBAcc*100, SVMAcc*100, NNAcc*100]})

# round to two decimal places and sort
compare = compare.sort_values(by='Accuracy', ascending=False)
compare = compare.round(decimals = 2)
compare
Out[115]:
Model Accuracy
1 Random Forest 92.28
0 Decision Tree 92.17
4 Neural Network 71.34
2 Naive Bayes 56.95
3 SVM 51.58
In [116]:
#pip install Kaleido
In [117]:
# Use Plotly and Kaleido to save table as png
import plotly.figure_factory as ff
import kaleido

fig = ff.create_table(compare)
fig.update_layout(
    autosize=False,
    width=300,
    height=150,
    )

fig.write_image("SMOTEModelTable.png", scale=1.5)
In [118]:
sns.set_theme(style="darkgrid")
sns.barplot(data=compare.sort_values(by='Accuracy', ascending=False), x='Model', y='Accuracy', palette="viridis_r")
plt.ylabel('Accuracy Percentage', fontsize=18, weight='bold')
plt.xlabel('Model', fontsize=18, weight='bold')
plt.title('SMOTE Model Accuracy', fontsize=26, weight='bold')
plt.ylim(1,100)
fig=plt.gcf()
fig.set_size_inches(20,10)
plt.show()
In [119]:
GS_compare = pd.DataFrame({'Model': ['Decision Tree', 'Random Forest', 'Naive Bayes', 'SVM', 'Neural Network'], 
                        'Accuracy': [GS_DTAcc*100, GS_RFAcc*100, GS_NBAcc*100, GS_SVMAcc*100, GS_NNAcc*100]})

# round to two decimal places in python pandas
#GS_compare.options.display.float_format = '{:.2f}'.format

GS_compare = GS_compare.sort_values(by='Accuracy', ascending=False)
GS_compare = GS_compare.round(decimals = 2)
GS_compare
Out[119]:
Model Accuracy
1 Random Forest 80.91
0 Decision Tree 78.05
3 SVM 69.35
2 Naive Bayes 64.54
4 Neural Network 50.45
In [120]:
# Use Plotly and Kaleido to save table as png
fig = ff.create_table(GS_compare)
fig.update_layout(
    autosize=False,
    width=300,
    height=150,
    )

fig.write_image("SMOTEGSModelTable.png", scale=1.5)
In [121]:
sns.set_theme(style="darkgrid")
sns.barplot(data=GS_compare.sort_values(by='Accuracy', ascending=False), x='Model', y='Accuracy', palette="viridis_r")
plt.ylabel('Accuracy Percentage', fontsize=18, weight='bold')
plt.xlabel('Model', fontsize=18, weight='bold')
plt.title('SMOTE Grid Search Model Accuracy', fontsize=26, weight='bold')
plt.ylim(1,100)
fig=plt.gcf()
fig.set_size_inches(20,10)
plt.show()

Created in deepnote.com Created in Deepnote